% HYPOTHETICAL EXPERIMENT/SIMULATION
% N subjects followed up for t time en measured every dt years
% p1 the risk of developing disease when exposed
% p0 the risk of developing disease when not exposed
% e = probability of being exposed
% we assume that there is no loss to follow-up

%%%% Paper simulation scenario II: In scenario II we assumed that disease D increased the probability of being exposed by ten times, but that exposure E did not increase the risk of disease (i.e. reverse causation). 

clear all

N = 4000;
e = 0.1;  % equal to pe in the article
RR = 1; % the relative risk associated with the exposure
p0 = 0.01;
p1 = RR*p0;
time = 5;
dt = 1;
de = 5; % getting the disease increases the probability of getting exposed to E 10 times

time = time+1; 
age_at_date=zeros(N,time); % Matrix for the biological age at a certain moment in time

%The subject array matrix wtih 5 layers: 
subject = zeros(N,time,5);
subject(:,:,1)=p0;

ted=zeros(N,2); %ted is a matrix in which column 1 contains the time of first exposure and column 2 the time of disease onset
ted(:,1)= 0; % Default mode is no exposure
ted(:,2)= 0; % Default mode is no disease

ted2=zeros(N,2); %ted2 is a matrix in which column 1 contains the biological age at first exposure and column 2 the biological age at disease onset

% Layer 5 of the subject matrix represents the probability of getting exposed
subject(:,:,5)=e;

% Initialize the random number generator to make the results repeatable.
rng(0,'twister');

for s=1:N  % s is the subject number
 
  %%% fill in disease matrix for individual s for t=0 to time (in scenario
  %%% II there is no influence of exposure on disease risk
  for t=1:dt:time
  %i=t+1; % The first column of the matrix represents time 0 but has index 1
  lambda = subject(s,t,1); 
  %wt = -log(rand (1, 1))/lambda; % Poisson waiting times follow an exponential distribution.
  B = binornd(dt,lambda);  
  if B>=dt
  subject(s,t:time,3)= 1; % Layer 3: the disease matrix
%  ted(s,2) = t-1+wt; % Minus 1 because t starts at 1
  %t=time;
  end
  end   
    
  %%% Revise the exposure matrix for individual s for t=0 to time (depending on whether disease D increases the probability of getting exposed)  
  for t=1:dt:time
  if subject(s,t,3)==0 % if the subject has the disease his probability of getting exposed is multiplied by de
  lambda= subject(s,t,5);
  else
   subject(s,t,5)=subject(s,t,5)*de; 
   lambda =subject(s,t,5);
  end
  
  % Poisson waiting times follow an exponential distribution.
 % wt = -log(rand (1, 1))/lambda;
  B = binornd(dt,lambda);   
 if B>=dt
  % i=t+1; % The first column of the matrix represents time 0 but has index 1
  subject(s,t:time,2)= 1; % Layer 2: the exposure matrix
%  ted(s,1) = t-1+wt; % Minus 1 because t starts at 1
  %t=time;
  end
  
   subject(s,t,4)=t-1; % Layer 4 contains the 'follow-up time' of every individual
  end
  
  
% Time in subject() represents time since birth, i.e. the biological age of a specific birth cohort...
% therefore, for simulating the ages of a random group of people at a certain moment
% in time with different biological ages, we will have to add a random
% amount of time to every individual's biological age in subject()

d = 20; % This is the difference in biological age between the oldest and the youngest person from the subject() cohort
r = d*rand(1);
age_at_date(s,:)=subject(s,:,4)+r; 

% ted2 is the matrix in which the timing of enrollment in the study for each subject is at a different biological age
% For example subject A can be enrolled at 10 years of age and subject B at 70 years of age
ted2(s,:)=ted(s,:)+r;  

end

% s = (1:N);

%plot(s,ted(:,1),'-',s, ted(:,2),'--');

%%%%%% Path to informatics.jar
javaaddpath('C:\Users\Ahmad Aziz\Desktop\Papers\Causal_inference\infodynamics-dist-1.2.1/infodynamics.jar');

%% A 2-dimensional time-window of length tw from the cohort

tw = time-1;
tb=1; % The time at the beginning of the time-window



sourceArray0=subject(:,tb:tb+tw,3); % select the exposure row as source
destArray0=subject(:,tb:tb+tw, 2); % select the disease row as destination
% tw2=2^(tw+1);

%% make two large rows
sourceArray1=sourceArray0';
destArray1=destArray0';
sourceArray=sourceArray1(:)';
destArray=destArray1(:)';


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Code from Example 5 - Multivariate transfer entropy on binary data 

%teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', tw2, 1);
%teCalc.initialise();
% We need to construct the joint values of the dest and source before we pass them in,
% and need to use the matrix conversion routine when calling from Matlab/Octave:
%mUtils=javaObject('infodynamics.utils.MatrixUtils');
% teCalc.addObservations(mUtils.computeCombinedValues(octaveToJavaIntMatrix(sourceArray), 2), ...
%		mUtils.computeCombinedValues(octaveToJavaIntMatrix(destArray), 2));

% teCalc.addObservations(mUtils.computeCombinedValues(sourceArray,2), mUtils.computeCombinedValues(destArray,2));

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Code from Example 1 - Transfer entropy on binary data

% global teCalc

teCalc=javaObject('infodynamics.measures.discrete.TransferEntropyCalculatorDiscrete', 2, tw);
teCalc.initialise();
% Since we have simple arrays of ints, we can directly pass these in:
teCalc.addObservations(sourceArray, destArray);
% Calculation of the TE:
result = teCalc.computeAverageLocalOfObservations();
fprintf('The transfer entropy is %.4f bits.\n', result);


cs = teCalc.computeSignificance(1000);
mean = getMeanOfDistribution(cs)
sd = getStdOfDistribution(cs)
tscore = getTSscore(cs) %Assuming the distribution is Gaussian, return a t-score for our observed measurement
pvalue = cs.pValue
 % % t2=(result2-mean)/sd


% %%%% Bootrstrap cofidence intervals %%%%%%%%%%%%%%%%%% 
% global counter btci_dist
% counter = 0;
% h = @aziz_bootstrapci_bin_transferentropy;
% bci = bootci(100,{h,sourceArray', destArray'}, 'alpha', 0.05, 'type', 'bca')

%dist=cs.distribution;

[correlation,p]=corrcoef(sourceArray,destArray)
%rho=corr(sourceArray',destArray')
%Pearson_corr(s)=pr(1,2);

%%% Save data for performing a cross-tabulation and calculating the
%%% chi-square statistic: a cross-section of the cohort at the end of the follow-up period
ct = time;
exposure=subject(:,ct,2);
disease =subject(:,ct,3);

save('aziz_transferentropy_simulation2.mat', 'exposure', 'disease');


% hist(dist);
% [counts, bins] = hist(dist);
% plot(bins, counts); %# get a line plot of the histogram

% tev is a vector containing the individual transfer entropies